/*
preparation for experiment
*/

#include "head.h"
#include "tools.h"
#include "function.h"
#include "kmeans.h"
#include "coreset.h"

double Mean(vector<double> a) {
	double sum = 0;
	for (double x : a) sum += x;
	return sum / (int)a.size();
}

double Max(vector<double> a) {
	double ret = 0;
	for (double x : a) ret = max(ret, x);
	return ret;
}

double Min(vector<double> a) {
	double ret = 1;
	for (double x : a) ret = min(ret, x);
	return ret;
}

double Var(vector<double> a) {
	double ret = 0, ave = Mean(a);
	for (double x : a) ret += (x - ave) * (x - ave);
	return ret / (int)a.size();
}

void perturb_heavy_tailed(dataset& X, double x_percent) {
	size_t n = X.size();
	if (n == 0) return;
	size_t D = X[0].size();
	size_t num_to_perturb = static_cast<size_t>(n * x_percent);

	// Setup random generators
	unsigned seed = std::chrono::system_clock::now().time_since_epoch().count();
	std::default_random_engine rng(seed);
	std::cauchy_distribution<double> cauchy_noise(0.0, 1.0);

	// Create and shuffle indices
	std::vector<size_t> indices(n);
	for (size_t i = 0; i < n; ++i) indices[i] = i;
	std::shuffle(indices.begin(), indices.end(), rng);

	// Perturb chosen points (ignore x[0], the bias/dummy term)
	for (size_t k = 0; k < num_to_perturb; ++k) {
		size_t idx = indices[k];
		for (size_t j = 1; j < D; ++j) { // j=1,...,D-1, assuming D = X[0].size()
			X[idx][j] += cauchy_noise(rng);
		}
	}
}

int main(int argc, char** argv) {
	if (argc < 4) {
		printf("3 arguments are required (data_name, data_size, data_dim).");
		return 0;
	}
	FILE* FIN = fopen(argv[1], "r");
	int N = atoi(argv[2]), D = atoi(argv[3]);
	dataset X;
	cerr << "-----------Reading Data-----------" << endl;
	for (int i = 1; i <= N; i++) {
		datapoint x; x.resize(D + 1);
		x[0] = 1;
		for (int j = 1; j <= D; j++)
			fscanf(FIN, "%lf", &x[j]);
		X.push_back(x);
	}
	cerr << "-----------Reading Over-----------" << endl;

	perturb_heavy_tailed(X, 0.1);

	cerr << "-----------Generating New Dataset-----------" << endl;
	FILE* NEWDATASET = fopen("perturbedDataset.txt", "w");
	for (int i = 0; i < N; i++) {
		for (int j = 1; j <= D; j++)
			fprintf(NEWDATASET, "%lf ", X[i][j]);
		fprintf(NEWDATASET, "\n");
	}
	fclose(NEWDATASET);
	cerr << "-----------Generating End-----------" << endl;
	return 0;
}